{
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Multivariate Adaptive Regression Splines (MARS) Part 2"
      ],
      "metadata": {
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "import matplotlib.pyplot as plt\n",
        "\n",
        "import warnings\n",
        "warnings.filterwarnings(\"ignore\")\n",
        "\n",
        "# yfinance is used to fetch data \n",
        "import yfinance as yf\n",
        "yf.pdr_override()"
      ],
      "outputs": [],
      "execution_count": 1,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-05-28T04:30:11.529Z",
          "iopub.execute_input": "2022-05-28T04:30:11.534Z",
          "shell.execute_reply": "2022-05-28T04:30:11.564Z",
          "iopub.status.idle": "2022-05-28T04:30:11.571Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# input\n",
        "symbol = 'AMD'\n",
        "start = '2014-01-01'\n",
        "end = '2018-08-27'\n",
        "\n",
        "# Read data \n",
        "dataset = yf.download(symbol,start,end)\n",
        "\n",
        "# View Columns\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[*********************100%***********************]  1 of 1 completed\n"
          ]
        },
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/plain": "            Open  High   Low  Close  Adj Close    Volume\nDate                                                    \n2014-01-02  3.85  3.98  3.84   3.95       3.95  20548400\n2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200\n2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300\n2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100\n2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2014-01-02</th>\n      <td>3.85</td>\n      <td>3.98</td>\n      <td>3.84</td>\n      <td>3.95</td>\n      <td>3.95</td>\n      <td>20548400</td>\n    </tr>\n    <tr>\n      <th>2014-01-03</th>\n      <td>3.98</td>\n      <td>4.00</td>\n      <td>3.88</td>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>22887200</td>\n    </tr>\n    <tr>\n      <th>2014-01-06</th>\n      <td>4.01</td>\n      <td>4.18</td>\n      <td>3.99</td>\n      <td>4.13</td>\n      <td>4.13</td>\n      <td>42398300</td>\n    </tr>\n    <tr>\n      <th>2014-01-07</th>\n      <td>4.19</td>\n      <td>4.25</td>\n      <td>4.11</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>42932100</td>\n    </tr>\n    <tr>\n      <th>2014-01-08</th>\n      <td>4.23</td>\n      <td>4.26</td>\n      <td>4.14</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>30678700</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 2,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-05-28T04:30:11.578Z",
          "iopub.execute_input": "2022-05-28T04:30:11.582Z",
          "iopub.status.idle": "2022-05-28T04:30:12.257Z",
          "shell.execute_reply": "2022-05-28T04:30:12.224Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset.tail()"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 3,
          "data": {
            "text/plain": "                 Open   High        Low      Close  Adj Close     Volume\nDate                                                                    \n2018-08-20  19.790001  20.08  19.350000  19.980000  19.980000   62983200\n2018-08-21  19.980000  20.42  19.860001  20.400000  20.400000   55629000\n2018-08-22  20.280001  20.92  20.209999  20.900000  20.900000   62002700\n2018-08-23  21.190001  22.32  21.139999  22.290001  22.290001  113444100\n2018-08-24  22.910000  24.00  22.670000  23.980000  23.980000  164328200",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2018-08-20</th>\n      <td>19.790001</td>\n      <td>20.08</td>\n      <td>19.350000</td>\n      <td>19.980000</td>\n      <td>19.980000</td>\n      <td>62983200</td>\n    </tr>\n    <tr>\n      <th>2018-08-21</th>\n      <td>19.980000</td>\n      <td>20.42</td>\n      <td>19.860001</td>\n      <td>20.400000</td>\n      <td>20.400000</td>\n      <td>55629000</td>\n    </tr>\n    <tr>\n      <th>2018-08-22</th>\n      <td>20.280001</td>\n      <td>20.92</td>\n      <td>20.209999</td>\n      <td>20.900000</td>\n      <td>20.900000</td>\n      <td>62002700</td>\n    </tr>\n    <tr>\n      <th>2018-08-23</th>\n      <td>21.190001</td>\n      <td>22.32</td>\n      <td>21.139999</td>\n      <td>22.290001</td>\n      <td>22.290001</td>\n      <td>113444100</td>\n    </tr>\n    <tr>\n      <th>2018-08-24</th>\n      <td>22.910000</td>\n      <td>24.00</td>\n      <td>22.670000</td>\n      <td>23.980000</td>\n      <td>23.980000</td>\n      <td>164328200</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 3,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-05-28T04:30:12.265Z",
          "iopub.execute_input": "2022-05-28T04:30:12.269Z",
          "iopub.status.idle": "2022-05-28T04:30:12.279Z",
          "shell.execute_reply": "2022-05-28T04:30:12.229Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X = dataset.iloc[ : , 1].values\n",
        "y = dataset.iloc[ : , 4].values"
      ],
      "outputs": [],
      "execution_count": 4,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-05-28T04:30:12.286Z",
          "iopub.execute_input": "2022-05-28T04:30:12.291Z",
          "iopub.status.idle": "2022-05-28T04:30:12.297Z",
          "shell.execute_reply": "2022-05-28T04:30:12.248Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "print(X.shape)\n",
        "print(y.shape)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(1171,)\n",
            "(1171,)\n"
          ]
        }
      ],
      "execution_count": 5,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-05-28T04:30:12.305Z",
          "iopub.execute_input": "2022-05-28T04:30:12.310Z",
          "iopub.status.idle": "2022-05-28T04:30:12.321Z",
          "shell.execute_reply": "2022-05-28T04:30:12.351Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.datasets import make_regression\n",
        "from sklearn.model_selection import cross_val_score\n",
        "from sklearn.model_selection import RepeatedKFold\n",
        "from pyearth import Earth"
      ],
      "outputs": [],
      "execution_count": 6,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-05-28T04:30:12.327Z",
          "iopub.execute_input": "2022-05-28T04:30:12.332Z",
          "iopub.status.idle": "2022-05-28T04:30:13.128Z",
          "shell.execute_reply": "2022-05-28T04:30:13.119Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "model = Earth()"
      ],
      "outputs": [],
      "execution_count": 7,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-05-28T04:30:13.135Z",
          "iopub.execute_input": "2022-05-28T04:30:13.140Z",
          "iopub.status.idle": "2022-05-28T04:30:13.147Z",
          "shell.execute_reply": "2022-05-28T04:30:13.359Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "model.fit(X, y)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 8,
          "data": {
            "text/plain": "Earth()"
          },
          "metadata": {}
        }
      ],
      "execution_count": 8,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-05-28T04:30:13.156Z",
          "iopub.execute_input": "2022-05-28T04:30:13.161Z",
          "shell.execute_reply": "2022-05-28T04:30:13.368Z",
          "iopub.status.idle": "2022-05-28T04:30:13.172Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "model.score(X, y)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 9,
          "data": {
            "text/plain": "0.9990783557868719"
          },
          "metadata": {}
        }
      ],
      "execution_count": 9,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "shell.execute_reply": "2022-05-28T04:30:13.391Z",
          "iopub.status.busy": "2022-05-28T04:30:13.179Z",
          "iopub.execute_input": "2022-05-28T04:30:13.184Z",
          "iopub.status.idle": "2022-05-28T04:30:13.195Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "print(model.summary())"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Earth Model\n",
            "-------------------------------------\n",
            "Basis Function  Pruned  Coefficient  \n",
            "-------------------------------------\n",
            "(Intercept)     No      19.3159      \n",
            "h(x0-19.71)     No      1.11187      \n",
            "h(19.71-x0)     No      -0.980184    \n",
            "-------------------------------------\n",
            "MSE: 0.0218, GCV: 0.0220, RSQ: 0.9991, GRSQ: 0.9991\n"
          ]
        }
      ],
      "execution_count": 10,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-05-28T04:30:13.202Z",
          "iopub.execute_input": "2022-05-28T04:30:13.207Z",
          "iopub.status.idle": "2022-05-28T04:30:13.223Z",
          "shell.execute_reply": "2022-05-28T04:30:13.399Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.model_selection import train_test_split\n",
        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)\n",
        "print ('Train set:', X_train.shape,  y_train.shape)\n",
        "print ('Test set:', X_test.shape,  y_test.shape)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Train set: (878,) (878,)\n",
            "Test set: (293,) (293,)\n"
          ]
        }
      ],
      "execution_count": 11,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-05-28T04:30:13.240Z",
          "iopub.execute_input": "2022-05-28T04:30:13.254Z",
          "iopub.status.idle": "2022-05-28T04:30:13.286Z",
          "shell.execute_reply": "2022-05-28T04:30:13.406Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)\n",
        "\n",
        "scores = cross_val_score(model, X_test, y_test, scoring='neg_mean_absolute_error',\n",
        "                         cv=cv, n_jobs=-1)\n"
      ],
      "outputs": [],
      "execution_count": 12,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-05-28T04:30:13.305Z",
          "iopub.execute_input": "2022-05-28T04:30:13.319Z",
          "iopub.status.idle": "2022-05-28T04:30:15.326Z",
          "shell.execute_reply": "2022-05-28T04:30:15.318Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "model.score(X_test, y_test)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 13,
          "data": {
            "text/plain": "0.9990505799246433"
          },
          "metadata": {}
        }
      ],
      "execution_count": 13,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-05-28T04:30:15.333Z",
          "iopub.execute_input": "2022-05-28T04:30:15.338Z",
          "iopub.status.idle": "2022-05-28T04:30:15.348Z",
          "shell.execute_reply": "2022-05-28T04:30:15.424Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "print('MAE: %.3f (%.3f)' % (np.mean(scores), np.std(scores)))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "MAE: -0.101 (0.018)\n"
          ]
        }
      ],
      "execution_count": 14,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-05-28T04:30:15.357Z",
          "iopub.execute_input": "2022-05-28T04:30:15.361Z",
          "iopub.status.idle": "2022-05-28T04:30:15.371Z",
          "shell.execute_reply": "2022-05-28T04:30:15.429Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "yhat = model.predict([19.97999954, 20.39999962, 20.89999962, 22.29000092, 23.97999954])"
      ],
      "outputs": [],
      "execution_count": 15,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-05-28T04:30:15.379Z",
          "iopub.execute_input": "2022-05-28T04:30:15.384Z",
          "iopub.status.idle": "2022-05-28T04:30:15.392Z",
          "shell.execute_reply": "2022-05-28T04:30:15.432Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "print('Prediction: %d' % yhat[0])"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Prediction: 19\n"
          ]
        }
      ],
      "execution_count": 16,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2022-05-28T04:30:15.399Z",
          "iopub.execute_input": "2022-05-28T04:30:15.403Z",
          "iopub.status.idle": "2022-05-28T04:30:15.412Z",
          "shell.execute_reply": "2022-05-28T04:30:15.435Z"
        }
      }
    }
  ],
  "metadata": {
    "kernel_info": {
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.6.13",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "kernelspec": {
      "argv": [
        "C:/Users/Tin Hang/Anaconda3\\python.exe",
        "-m",
        "ipykernel_launcher",
        "-f",
        "{connection_file}"
      ],
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "nteract": {
      "version": "0.28.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}